#!/usr/bin/python
"""
clean sentences of penn treebank so we can use bikel parser or stanford parser to parse these sentences.
	clean *PRO* *T*-1 ...
	-LRB- -> (         -RRB- -> )
"""
import sys

fi=file(sys.argv[1],'r')
fo=file(sys.argv[2],'w')

lines=fi.readlines()
fi.close()
for l in lines:
	ws=l.strip().split()
	nws = []
	for w in ws:
		if w == "":
			continue
		if w.find("*")>=0:
			#sys.stderr.write(w+" ")
			continue
		if w=="-LRB-":
			sys.stderr.write(w+" ")
			w="("
		if w=="-RRB-":
			sys.stderr.write(w+" ")
			w=")"
		nws.append(w)
	fo.write(" ".join(nws)+"\n")
fo.close()
